# Training a hyper-parameter tuned Xg-Boost regressor on our train data
# find more about XGBRegressor function here
# http://xgboost.readthedocs.io/en/latest/python/python_api.html?#module-xgboost.sklearn
# -------------------------
# default paramters
# xgboost.XGBRegressor(max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective='reg:linear',
# booster='gbtree', n_jobs=1, nthread=None, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1,
# colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, random_state=0, seed=None,
# missing=None, **kwargs)
# some of methods of RandomForestRegressor()
# fit(X, y, sample_weight=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None)
# get_params([deep]) Get parameters for this estimator.
# predict(data, output_margin=False, ntree_limit=0) : Predict with data. NOTE: This function is not thread safe.
# get_score(importance_type='weight') -> get the feature importance
# -----------------------
def xg_reg(df_train,df_test,train_output):
c_param={'learning_rate' :stats.uniform(0.01,0.2),
'n_estimators':sp_randint(100,1000),
'max_depth':sp_randint(1,10),
'min_child_weight':sp_randint(1,8),
'gamma':stats.uniform(0,0.02),
'subsample':stats.uniform(0.6,0.4),
'reg_alpha':sp_randint(0,200),
'reg_lambda':stats.uniform(0,200),
'colsample_bytree':stats.uniform(0.6,0.3)}
xreg= xgb.XGBRegressor(nthread = 4)
model3 = RandomizedSearchCV(xreg, param_distributions= c_param, scoring = "neg_mean_absolute_error", cv = 3)
model3.fit(df_train, train_output)
y_pred = model3.predict(df_test)
xgb_test_predictions = [round(value) for value in y_pred]
y_pred = model3.predict(df_train)
xgb_train_predictions = [round(value) for value in y_pred]
print(model3.best_params_)
return xgb_train_predictions,xgb_test_predictions